The purpose of this document is to benchmark the big data systems against each other. Here are the properties of the machine that is benchmarking the code.
sysctl -n machdep.cpu.brand_string
printf -v a "Number of processors: %s" $(sysctl -n hw.ncpu)
echo $a
## Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
## Number of processors: 8
We will install the needed packages from the other analyses. Even though a package “bench” exists, we will use the package “microbenchmark” due to it not being tidy-dependent. Also, “bench” requires the output of the benchmarked functions to return the exact same object, which is a bit overly strict for our purposes.
options(repos = "https://cran.rstudio.com/",
warn = -1,
stringsAsFactors = FALSE)
cur_pkgs <- rownames(installed.packages())
req_pkgs <- c("tidyverse",
"here",
"vroom",
"magrittr",
"data.table",
"microbenchmark")
# determine packages that are missing
miss_pkgs <- setdiff(x = req_pkgs,
y = cur_pkgs)
# installing missing packages
if(length(miss_pkgs)){
install.packages(miss_pkgs)
}
# load all of the relevant packages
library(magrittr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(microbenchmark)
Let’s also put a function that will run the benchmark for everything and suppress warnings/messages.
run_benchmark <- function(..., unit = "ms"){
require(microbenchmark)
# fixing call by adding microbenchmark
cur_call <- match.call()
cur_call[[1]] <- as.name("microbenchmark")
# running and suppressing messages/warnings
bnch <- suppressWarnings(
suppressMessages(
eval(cur_call, envir = parent.frame())
)
)
# creating list of benchmark/summary/plot
list(benchmark = bnch,
default = summary(bnch, unit = unit),
relative = summary(bnch, unit = "relative"),
plot = autoplot(bnch))
}
Let’s also put a function that will plot benchmarks if we are combining a bunch of benchmarks in a list.
plot_benchmarks <- function(bnch_list,
id = "percent_missing",
unit = "relative",
use = "median"){
# get data and put in data.frame
dfs <- lapply(bnch_list,
FUN = "[[",
"benchmark") %>%
lapply(FUN = summary,
unit = unit) %>%
bind_rows(.id = id)
# make sure the levels are in order
dfs[[id]] %<>% factor(levels = unique(.))
# make simple barplot
ggplot(data = dfs,
mapping = aes(x = !!sym(id),
y = !!sym(use),
fill = expr)) +
geom_col(position = position_dodge()) +
theme_minimal()
}
Indicate the current directory R:
project_dir <- here::here()
data_dir <- file.path(project_dir, "data")
Benchmarks for small files:
small_file <- file.path(data_dir, "demos_to_merge.csv")
bnch_read_small <- run_benchmark(
read.csv(small_file, header = TRUE),
read_csv(small_file),
vroom::vroom(small_file),
fread(small_file)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_read_small
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## read.csv(small_file, header = TRUE) 1.282216 1.374542 1.453796 1.421444
## read_csv(small_file) 15.345299 15.997121 18.301205 16.627820
## vroom::vroom(small_file) 8.648039 9.088017 10.267576 9.398193
## fread(small_file) 1.018439 1.224679 1.342771 1.357349
## uq max neval
## 1.505419 2.773271 100
## 17.090287 153.541067 100
## 9.718265 81.681823 100
## 1.426810 3.170288 100
##
## $default
## expr min lq mean median
## 1 read.csv(small_file, header = TRUE) 1.282216 1.374542 1.453796 1.421444
## 2 read_csv(small_file) 15.345299 15.997121 18.301205 16.627820
## 3 vroom::vroom(small_file) 8.648039 9.088017 10.267576 9.398193
## 4 fread(small_file) 1.018439 1.224679 1.342771 1.357349
## uq max neval
## 1 1.505419 2.773271 100
## 2 17.090287 153.541067 100
## 3 9.718265 81.681823 100
## 4 1.426810 3.170288 100
##
## $relative
## expr min lq mean median
## 1 read.csv(small_file, header = TRUE) 1.259001 1.122369 1.082683 1.04722
## 2 read_csv(small_file) 15.067470 13.062297 13.629428 12.25021
## 3 vroom::vroom(small_file) 8.491465 7.420734 7.646556 6.92393
## 4 fread(small_file) 1.000000 1.000000 1.000000 1.00000
## uq max neval
## 1 1.055095 0.8747694 100
## 2 11.977974 48.4312678 100
## 3 6.811186 25.7647958 100
## 4 1.000000 1.0000000 100
##
## $plot
Benchmarks for large files:
large_file <- file.path(data_dir, "master_data_20210315.csv")
bnch_read_large <- run_benchmark(
read.csv(large_file, header = TRUE),
read_csv(large_file),
fread(large_file),
vroom::vroom(large_file)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_read_large
## $benchmark
## Unit: milliseconds
## expr min lq mean
## read.csv(large_file, header = TRUE) 1115.27395 1208.69211 1364.37843
## read_csv(large_file) 351.41107 387.80298 451.24115
## fread(large_file) 104.79205 118.67855 155.92661
## vroom::vroom(large_file) 28.79996 30.26235 32.98453
## median uq max neval
## 1323.45623 1444.0462 2579.52315 100
## 416.46413 478.0314 753.24136 100
## 124.54401 170.5779 463.35285 100
## 32.30613 34.5490 51.23582 100
##
## $default
## expr min lq mean
## 1 read.csv(large_file, header = TRUE) 1115.27395 1208.69211 1364.37843
## 2 read_csv(large_file) 351.41107 387.80298 451.24115
## 3 fread(large_file) 104.79205 118.67855 155.92661
## 4 vroom::vroom(large_file) 28.79996 30.26235 32.98453
## median uq max neval
## 1 1323.45623 1444.0462 2579.52315 100
## 2 416.46413 478.0314 753.24136 100
## 3 124.54401 170.5779 463.35285 100
## 4 32.30613 34.5490 51.23582 100
##
## $relative
## expr min lq mean median
## 1 read.csv(large_file, header = TRUE) 38.724840 39.940454 41.364189 40.96611
## 2 read_csv(large_file) 12.201789 12.814700 13.680387 12.89118
## 3 fread(large_file) 3.638618 3.921656 4.727265 3.85512
## 4 vroom::vroom(large_file) 1.000000 1.000000 1.000000 1.00000
## uq max neval
## 1 41.797048 50.346089 100
## 2 13.836332 14.701460 100
## 3 4.937274 9.043533 100
## 4 1.000000 1.000000 100
##
## $plot
Let’s read in the small set of data and the large set of data and divide it so that it can be used to benchmark what follows. Note that we’re not doing super-systematic benchmarks, just an example of large data and small data.
read_data <- function(path){
read.csv(path)[-1]
}
# 1. READING SMALL #
# we will use read.csv so as not to pick a fight with tidy vs data.table people
small_demos <- read_data(file.path(data_dir, "demos_to_merge.csv"))
small_scores <- read_data(file.path(data_dir, "scores_to_merge.csv"))
small_comb_1 <- read_data(file.path(data_dir, "data_to_rowbind.csv"))
# creating additional combination data for benchmarking
small_comb_2 <- merge(small_demos, small_scores)
small_comb_all <- rbind(small_comb_1,
small_comb_2)
# 2. READING LARGE #
large_comb_all <- read_data(large_file)
# 3. DIVING LARGE #
# dividing large_comb_all into several sets
n_rows_large <- nrow(large_comb_all)
merge_idx <- seq_len(ceiling(n_rows_large / 5))
large_comb_1 <- large_comb_all[-merge_idx, ]
large_comb_2 <- large_comb_all[merge_idx, ]
large_demos <- large_comb_2[names(small_demos)]
large_scores <- large_comb_2[names(small_scores)]
First, let’s have some preliminary objects. 1. The variable marking the “ID” columns in which we are going to merge. 2. A function to automatically perform the merges and return useful results.
id_var <- "guid"
test_joins <- function(x, y,
by = "guid",
keep = 1){
# make sure keep is length 2
keep <- pmax(0, pmin(1, rep_len(keep, 2)))
# sampling rows of x and y to match keep
x_and_y <- Map(
f = function(df, prob){
n <- nrow(df)
df[sample.int(n, size = ceiling(prob * n)), , drop = FALSE]
},
df = list(x = x, y = y),
prob = keep
)
# pulling out x and y for purposes of merging
x <- x_and_y$x
y <- x_and_y$y
# turning into data.tables and setting keys
x_dt <- as.data.table(x)
y_dt <- as.data.table(y)
setkeyv(x_dt, by)
setkeyv(y_dt, by)
# running inner/left/right/outer
bnch_inner <- run_benchmark(
merge(x, y, by = by, all = FALSE),
inner_join(x, y, by = by),
merge(x_dt, y_dt, by = by, all = FALSE),
x_dt[y_dt, nomatch = 0]
)
bnch_left <- run_benchmark(
merge(x, y, by = by, all.x = TRUE),
left_join(x, y, by = by),
merge(x_dt, y_dt, by = by, all.x = TRUE),
x_dt[y_dt]
)
bnch_right <- run_benchmark(
merge(x, y, by = by, all.y = TRUE),
right_join(x, y, by = by),
merge(x_dt, y_dt, by = by, all.y = TRUE),
y_dt[x_dt]
)
bnch_outer <- run_benchmark(
merge(x, y, by = by, all = TRUE),
full_join(x, y, by = by),
merge(x_dt, y_dt, by = by, all = TRUE)
)
list(inner = bnch_inner,
left = bnch_left,
right = bnch_right,
outer = bnch_outer)
}
Benchmarks for small data:
bnch_join_small_1 <- test_joins(x = small_demos,
y = small_scores,
by = id_var)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
# Inner
bnch_join_small_1$inner
## $benchmark
## Unit: microseconds
## expr min lq mean median
## merge(x, y, by = by, all = FALSE) 1598.126 1691.903 1801.2448 1812.8450
## inner_join(x, y, by = by) 1636.832 1820.959 2040.8562 1982.9700
## merge(x_dt, y_dt, by = by, all = FALSE) 1096.366 1178.578 1519.7472 1303.2040
## x_dt[y_dt, nomatch = 0] 708.224 735.743 910.1826 844.9145
## uq max neval
## 1899.7190 2342.695 100
## 2140.9245 7065.595 100
## 1392.3395 22953.444 100
## 875.5345 9050.191 100
##
## $default
## expr min lq mean median
## 1 merge(x, y, by = by, all = FALSE) 1.598126 1.691903 1.8012448 1.8128450
## 2 inner_join(x, y, by = by) 1.636832 1.820959 2.0408562 1.9829700
## 3 merge(x_dt, y_dt, by = by, all = FALSE) 1.096366 1.178578 1.5197472 1.3032040
## 4 x_dt[y_dt, nomatch = 0] 0.708224 0.735743 0.9101826 0.8449145
## uq max neval
## 1 1.8997190 2.342695 100
## 2 2.1409245 7.065595 100
## 3 1.3923395 22.953444 100
## 4 0.8755345 9.050191 100
##
## $relative
## expr min lq mean median
## 1 merge(x, y, by = by, all = FALSE) 2.256526 2.299584 1.978993 2.145596
## 2 inner_join(x, y, by = by) 2.311178 2.474993 2.242249 2.346948
## 3 merge(x_dt, y_dt, by = by, all = FALSE) 1.548050 1.601888 1.669717 1.542409
## 4 x_dt[y_dt, nomatch = 0] 1.000000 1.000000 1.000000 1.000000
## uq max neval
## 1 2.169782 0.2588559 100
## 2 2.445277 0.7807123 100
## 3 1.590274 2.5362386 100
## 4 1.000000 1.0000000 100
##
## $plot
# Left
bnch_join_small_1$left
## $benchmark
## Unit: microseconds
## expr min lq mean median
## merge(x, y, by = by, all.x = TRUE) 1584.065 1668.655 3000.9129 1717.7825
## left_join(x, y, by = by) 1621.288 1759.643 1880.9304 1841.5825
## merge(x_dt, y_dt, by = by, all.x = TRUE) 1097.691 1171.461 1220.1517 1211.9225
## x_dt[y_dt] 716.161 741.600 777.6728 755.3995
## uq max neval
## 1770.6570 128892.856 100
## 1976.4255 2565.177 100
## 1249.1685 1405.529 100
## 800.0455 1051.161 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all.x = TRUE) 1.584065 1.668655 3.0009129
## 2 left_join(x, y, by = by) 1.621288 1.759643 1.8809304
## 3 merge(x_dt, y_dt, by = by, all.x = TRUE) 1.097691 1.171460 1.2201517
## 4 x_dt[y_dt] 0.716161 0.741600 0.7776728
## median uq max neval
## 1 1.7177825 1.7706570 128.892856 100
## 2 1.8415825 1.9764255 2.565177 100
## 3 1.2119225 1.2491685 1.405529 100
## 4 0.7553995 0.8000455 1.051161 100
##
## $relative
## expr min lq mean median
## 1 merge(x, y, by = by, all.x = TRUE) 2.211884 2.250075 3.858837 2.274005
## 2 left_join(x, y, by = by) 2.263860 2.372766 2.418665 2.437892
## 3 merge(x_dt, y_dt, by = by, all.x = TRUE) 1.532743 1.579639 1.568978 1.604346
## 4 x_dt[y_dt] 1.000000 1.000000 1.000000 1.000000
## uq max neval
## 1 2.213195 122.619519 100
## 2 2.470391 2.440327 100
## 3 1.561372 1.337121 100
## 4 1.000000 1.000000 100
##
## $plot
# Right
bnch_join_small_1$right
## $benchmark
## Unit: microseconds
## expr min lq mean
## merge(x, y, by = by, all.y = TRUE) 1589.690 1667.0275 1707.8532
## right_join(x, y, by = by) 1650.160 1833.8985 1928.8905
## merge(x_dt, y_dt, by = by, all.y = TRUE) 1643.661 1728.8990 2092.8514
## y_dt[x_dt] 705.300 733.5985 767.8416
## median uq max neval
## 1694.9705 1732.2710 1933.605 100
## 1901.2605 2002.3065 2289.361 100
## 1783.7465 1883.4460 30080.406 100
## 754.3565 790.9835 882.657 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all.y = TRUE) 1.589690 1.6670275 1.7078532
## 2 right_join(x, y, by = by) 1.650160 1.8338985 1.9288905
## 3 merge(x_dt, y_dt, by = by, all.y = TRUE) 1.643661 1.7288990 2.0928514
## 4 y_dt[x_dt] 0.705300 0.7335985 0.7678416
## median uq max neval
## 1 1.6949705 1.7322710 1.933605 100
## 2 1.9012605 2.0023065 2.289361 100
## 3 1.7837465 1.8834460 30.080406 100
## 4 0.7543565 0.7909835 0.882657 100
##
## $relative
## expr min lq mean median
## 1 merge(x, y, by = by, all.y = TRUE) 2.253920 2.272398 2.224226 2.246909
## 2 right_join(x, y, by = by) 2.339657 2.499867 2.512094 2.520374
## 3 merge(x_dt, y_dt, by = by, all.y = TRUE) 2.330442 2.356737 2.725629 2.364594
## 4 y_dt[x_dt] 1.000000 1.000000 1.000000 1.000000
## uq max neval
## 1 2.190022 2.190664 100
## 2 2.531414 2.593715 100
## 3 2.381144 34.079383 100
## 4 1.000000 1.000000 100
##
## $plot
# Outer
bnch_join_small_1$outer
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## merge(x, y, by = by, all = TRUE) 1.587842 1.629756 1.888675 1.653839
## full_join(x, y, by = by) 1.653139 1.691276 1.820628 1.798546
## merge(x_dt, y_dt, by = by, all = TRUE) 1.591998 1.640868 1.729458 1.704767
## uq max neval
## 1.683472 24.432939 100
## 1.929545 2.236942 100
## 1.776987 2.286940 100
##
## $default
## expr min lq mean median
## 1 merge(x, y, by = by, all = TRUE) 1.587842 1.629756 1.888675 1.653839
## 2 full_join(x, y, by = by) 1.653139 1.691276 1.820628 1.798546
## 3 merge(x_dt, y_dt, by = by, all = TRUE) 1.591998 1.640868 1.729458 1.704767
## uq max neval
## 1 1.683472 24.432939 100
## 2 1.929545 2.236942 100
## 3 1.776987 2.286940 100
##
## $relative
## expr min lq mean median
## 1 merge(x, y, by = by, all = TRUE) 1.000000 1.000000 1.0000000 1.000000
## 2 full_join(x, y, by = by) 1.041123 1.037748 0.9639708 1.087498
## 3 merge(x_dt, y_dt, by = by, all = TRUE) 1.002617 1.006818 0.9156988 1.030794
## uq max neval
## 1 1.000000 1.00000000 100
## 2 1.146170 0.09155436 100
## 3 1.055549 0.09360069 100
##
## $plot
Benchmarks for large data:
bnch_join_large_1 <- test_joins(x = large_demos,
y = large_scores,
by = id_var)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
# Inner
bnch_join_large_1$inner
## $benchmark
## Unit: milliseconds
## expr min lq mean
## merge(x, y, by = by, all = FALSE) 85.121168 85.892314 89.186439
## inner_join(x, y, by = by) 7.043479 7.509643 9.531737
## merge(x_dt, y_dt, by = by, all = FALSE) 5.374240 5.989276 6.385281
## x_dt[y_dt, nomatch = 0] 4.633596 5.405561 7.420612
## median uq max neval
## 86.604356 88.754589 118.10214 100
## 7.674637 8.049369 40.90420 100
## 6.109198 6.267143 31.83852 100
## 5.587108 5.764193 36.98498 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all = FALSE) 85.121168 85.892314 89.186439
## 2 inner_join(x, y, by = by) 7.043479 7.509643 9.531737
## 3 merge(x_dt, y_dt, by = by, all = FALSE) 5.374240 5.989276 6.385281
## 4 x_dt[y_dt, nomatch = 0] 4.633596 5.405561 7.420612
## median uq max neval
## 1 86.604356 88.754589 118.10214 100
## 2 7.674637 8.049369 40.90420 100
## 3 6.109198 6.267143 31.83852 100
## 4 5.587108 5.764193 36.98498 100
##
## $relative
## expr min lq mean
## 1 merge(x, y, by = by, all = FALSE) 18.370434 15.889621 12.0187450
## 2 inner_join(x, y, by = by) 1.520089 1.389244 1.2844948
## 3 merge(x_dt, y_dt, by = by, all = FALSE) 1.159842 1.107984 0.8604791
## 4 x_dt[y_dt, nomatch = 0] 1.000000 1.000000 1.0000000
## median uq max neval
## 1 15.500749 15.397575 3.1932455 100
## 2 1.373633 1.396443 1.1059677 100
## 3 1.093445 1.087254 0.8608499 100
## 4 1.000000 1.000000 1.0000000 100
##
## $plot
# Left
bnch_join_large_1$left
## $benchmark
## Unit: milliseconds
## expr min lq mean
## merge(x, y, by = by, all.x = TRUE) 85.293129 85.735330 91.044625
## left_join(x, y, by = by) 6.978856 7.446243 8.954506
## merge(x_dt, y_dt, by = by, all.x = TRUE) 5.229084 5.625960 6.255178
## x_dt[y_dt] 4.401157 5.101157 5.763486
## median uq max neval
## 86.110446 86.696562 209.64710 100
## 7.627331 7.781408 41.31165 100
## 5.754943 5.894331 30.65941 100
## 5.257812 5.315555 33.60808 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all.x = TRUE) 85.293129 85.735330 91.044625
## 2 left_join(x, y, by = by) 6.978856 7.446243 8.954506
## 3 merge(x_dt, y_dt, by = by, all.x = TRUE) 5.229084 5.625960 6.255178
## 4 x_dt[y_dt] 4.401157 5.101157 5.763486
## median uq max neval
## 1 86.110446 86.696562 209.64710 100
## 2 7.627331 7.781408 41.31165 100
## 3 5.754943 5.894331 30.65941 100
## 4 5.257812 5.315555 33.60808 100
##
## $relative
## expr min lq mean
## 1 merge(x, y, by = by, all.x = TRUE) 19.379706 16.807034 15.796797
## 2 left_join(x, y, by = by) 1.585687 1.459716 1.553661
## 3 merge(x_dt, y_dt, by = by, all.x = TRUE) 1.188116 1.102879 1.085312
## 4 x_dt[y_dt] 1.000000 1.000000 1.000000
## median uq max neval
## 1 16.377618 16.309974 6.2379975 100
## 2 1.450666 1.463894 1.2292179 100
## 3 1.094551 1.108884 0.9122632 100
## 4 1.000000 1.000000 1.0000000 100
##
## $plot
# Right
bnch_join_large_1$right
## $benchmark
## Unit: milliseconds
## expr min lq mean
## merge(x, y, by = by, all.y = TRUE) 85.130469 85.666714 90.357069
## right_join(x, y, by = by) 7.723282 8.531254 9.527223
## merge(x_dt, y_dt, by = by, all.y = TRUE) 7.670457 9.085331 10.313555
## y_dt[x_dt] 4.050937 5.052690 5.671638
## median uq max neval
## 86.091155 87.262653 118.58536 100
## 8.703039 8.956814 38.54915 100
## 9.281391 9.446704 41.19841 100
## 5.247997 5.385542 32.72982 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all.y = TRUE) 85.130469 85.666714 90.357069
## 2 right_join(x, y, by = by) 7.723282 8.531254 9.527223
## 3 merge(x_dt, y_dt, by = by, all.y = TRUE) 7.670457 9.085331 10.313555
## 4 y_dt[x_dt] 4.050937 5.052690 5.671638
## median uq max neval
## 1 86.091155 87.262653 118.58536 100
## 2 8.703039 8.956814 38.54915 100
## 3 9.281391 9.446704 41.19841 100
## 4 5.247997 5.385542 32.72982 100
##
## $relative
## expr min lq mean
## 1 merge(x, y, by = by, all.y = TRUE) 21.015007 16.954673 15.931389
## 2 right_join(x, y, by = by) 1.906542 1.688458 1.679801
## 3 merge(x_dt, y_dt, by = by, all.y = TRUE) 1.893502 1.798117 1.818444
## 4 y_dt[x_dt] 1.000000 1.000000 1.000000
## median uq max neval
## 1 16.404574 16.203131 3.623159 100
## 2 1.658355 1.663122 1.177799 100
## 3 1.768559 1.754086 1.258742 100
## 4 1.000000 1.000000 1.000000 100
##
## $plot
# Outer
bnch_join_large_1$outer
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## merge(x, y, by = by, all = TRUE) 84.733308 85.510103 88.351556 85.882728
## full_join(x, y, by = by) 8.155158 8.506530 10.870472 8.690476
## merge(x_dt, y_dt, by = by, all = TRUE) 8.190572 8.767346 9.759042 9.013307
## uq max neval
## 86.452453 115.47279 100
## 8.978945 38.10126 100
## 9.179148 41.56250 100
##
## $default
## expr min lq mean
## 1 merge(x, y, by = by, all = TRUE) 84.733308 85.510103 88.351556
## 2 full_join(x, y, by = by) 8.155158 8.506530 10.870472
## 3 merge(x_dt, y_dt, by = by, all = TRUE) 8.190572 8.767346 9.759042
## median uq max neval
## 1 85.882728 86.452453 115.47279 100
## 2 8.690476 8.978945 38.10126 100
## 3 9.013307 9.179148 41.56250 100
##
## $relative
## expr min lq mean median
## 1 merge(x, y, by = by, all = TRUE) 10.390149 10.052290 8.127665 9.882396
## 2 full_join(x, y, by = by) 1.000000 1.000000 1.000000 1.000000
## 3 merge(x_dt, y_dt, by = by, all = TRUE) 1.004343 1.030661 0.897757 1.037148
## uq max neval
## 1 9.628353 3.030682 100
## 2 1.000000 1.000000 100
## 3 1.022297 1.090843 100
##
## $plot
We can look at what happens when we have a certain proportion of the full data (with not all rows the same).
set.seed(888)
keep_perc <- setNames(nm = seq(.2, .8, by = .2))
# running benchmarks
bnch_join_large_2 <- lapply(X = keep_perc,
FUN = test_joins,
x = large_demos,
y = large_scores,
by = id_var)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
# rearranging outer and inner lists
bnch_join_large_2 <- do.call(what = Map,
args = c(list(f = list), bnch_join_large_2))
# creating plots of all of the benchmarks
bnch_join_large_2_gg <- lapply(X = bnch_join_large_2,
FUN = plot_benchmarks,
id = "percent_missing",
unit = "relative",
use = "median")
# Inner
bnch_join_large_2_gg$inner
# Left
bnch_join_large_2_gg$left
# Right
bnch_join_large_2_gg$right
# Outer
bnch_join_large_2_gg$outer
First, let’s have some preliminary objects. 1. A function to automatically splits the data, binds the data, and returns useful results.
test_binds <- function(x,
n_splits = 2){
# splitting data
x <- split.data.frame(x = x,
f = rep_len(seq_len(n_splits), nrow(x)))
# binding everything together again
run_benchmark(
do.call(rbind, x),
bind_rows(x),
rbindlist(x)
)
}
Specifying the total number of groups we always want to compare.
n_groups <- setNames(nm = seq(2, 102, by = 10))
Benchmarks for small data:
bnch_bind_small <- lapply(X = n_groups,
FUN = test_binds,
x = small_comb_all)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot_benchmarks(
bnch_list = bnch_bind_small,
id = "n_groups",
unit = "relative",
use = "median"
)
Benchmarks for large data:
bnch_bind_large <- lapply(X = n_groups,
FUN = test_binds,
x = large_comb_all)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot_benchmarks(
bnch_list = bnch_bind_large,
id = "n_groups",
unit = "relative",
use = "median"
)
Let’s have some preliminary objects across all of the reshaping. 1. The name of the variable column and value column 2. The entries in the value column (for wide data).
var_name <- "trait"
val_name <- "normed_score"
var_vals <- grep(x = names(small_scores),
pattern = "^X[0-9]+",
value = TRUE)
We can add functions to quickly reshape data and make it easier to apply.
wide_to_long_base <- function(df){
reshape(df,
varying = var_vals,
v.names = val_name,
timevar = var_name,
idvar = names(small_demos),
direction = "long")
}
wide_to_long_tidy_gather <- function(df){
gather(df,
key = !!var_name,
value = !!val_name,
one_of(var_vals))
}
wide_to_long_tidy_pivot <- function(df){
pivot_longer(df,
cols = var_vals,
names_to = var_name,
values_to = val_name)
}
wide_to_long_dt <- function(df){
melt(as.data.table(df),
measure.vars = var_vals,
variable.name = var_name,
value.name = val_name)
}
Benchmarks for small data:
bnch_shape_long_small <- run_benchmark(
wide_to_long_base(small_comb_all),
wide_to_long_tidy_gather(small_comb_all),
wide_to_long_tidy_pivot(small_comb_all),
wide_to_long_dt(small_comb_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_shape_long_small
## $benchmark
## Unit: microseconds
## expr min lq mean
## wide_to_long_base(small_comb_all) 67637.610 69233.5815 72719.6865
## wide_to_long_tidy_gather(small_comb_all) 7435.185 7677.5060 8275.2071
## wide_to_long_tidy_pivot(small_comb_all) 4850.805 5241.1185 5563.4792
## wide_to_long_dt(small_comb_all) 522.808 618.4355 672.4733
## median uq max neval
## 71039.671 72829.106 124009.333 100
## 7786.309 7962.420 45648.844 100
## 5459.282 5718.937 10846.045 100
## 652.936 683.615 2608.079 100
##
## $default
## expr min lq mean
## 1 wide_to_long_base(small_comb_all) 67.637610 69.2335815 72.7196865
## 2 wide_to_long_tidy_gather(small_comb_all) 7.435185 7.6775060 8.2752071
## 3 wide_to_long_tidy_pivot(small_comb_all) 4.850805 5.2411185 5.5634792
## 4 wide_to_long_dt(small_comb_all) 0.522808 0.6184355 0.6724733
## median uq max neval
## 1 71.039671 72.829106 124.009333 100
## 2 7.786309 7.962420 45.648844 100
## 3 5.459282 5.718937 10.846045 100
## 4 0.652936 0.683615 2.608079 100
##
## $relative
## expr min lq mean
## 1 wide_to_long_base(small_comb_all) 129.373709 111.949559 108.137664
## 2 wide_to_long_tidy_gather(small_comb_all) 14.221636 12.414401 12.305630
## 3 wide_to_long_tidy_pivot(small_comb_all) 9.278368 8.474802 8.273161
## 4 wide_to_long_dt(small_comb_all) 1.000000 1.000000 1.000000
## median uq max neval
## 1 108.800359 106.535267 47.548151 100
## 2 11.925072 11.647521 17.502861 100
## 3 8.361128 8.365728 4.158634 100
## 4 1.000000 1.000000 1.000000 100
##
## $plot
Benchmarks for large data:
bnch_shape_long_large <- run_benchmark(
wide_to_long_base(large_comb_all),
wide_to_long_tidy_gather(large_comb_all),
wide_to_long_tidy_pivot(large_comb_all),
wide_to_long_dt(large_comb_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_shape_long_large
## $benchmark
## Unit: milliseconds
## expr min lq mean
## wide_to_long_base(large_comb_all) 18192.39325 18939.07560 25267.0711
## wide_to_long_tidy_gather(large_comb_all) 718.68953 725.87641 875.2249
## wide_to_long_tidy_pivot(large_comb_all) 79.62178 88.19576 320.8779
## wide_to_long_dt(large_comb_all) 32.82118 43.37826 162.7716
## median uq max neval
## 21315.15178 29139.91710 52758.411 100
## 729.34497 737.47770 8291.349 100
## 93.06330 95.30745 7373.518 100
## 44.94552 46.47995 3040.080 100
##
## $default
## expr min lq mean
## 1 wide_to_long_base(large_comb_all) 18192.39325 18939.07560 25267.0711
## 2 wide_to_long_tidy_gather(large_comb_all) 718.68953 725.87641 875.2249
## 3 wide_to_long_tidy_pivot(large_comb_all) 79.62178 88.19576 320.8779
## 4 wide_to_long_dt(large_comb_all) 32.82118 43.37826 162.7716
## median uq max neval
## 1 21315.15178 29139.91710 52758.411 100
## 2 729.34497 737.47770 8291.349 100
## 3 93.06330 95.30745 7373.518 100
## 4 44.94552 46.47995 3040.080 100
##
## $relative
## expr min lq mean
## 1 wide_to_long_base(large_comb_all) 554.288130 436.602934 155.230190
## 2 wide_to_long_tidy_gather(large_comb_all) 21.897123 16.733645 5.377012
## 3 wide_to_long_tidy_pivot(large_comb_all) 2.425926 2.033179 1.971338
## 4 wide_to_long_dt(large_comb_all) 1.000000 1.000000 1.000000
## median uq max neval
## 1 474.24415 626.935248 17.354285 100
## 2 16.22731 15.866578 2.727346 100
## 3 2.07058 2.050507 2.425436 100
## 4 1.00000 1.000000 1.000000 100
##
## $plot
We can add functions to quickly reshape data and make it easier to apply and create long data for benchmarking purposes.
small_comb_long_all <- as.data.frame(wide_to_long_dt(small_comb_all))
large_comb_long_all <- as.data.frame(wide_to_long_dt(large_comb_all))
long_to_wide_base <- function(df){
reshape(df,
v.names = val_name,
timevar = var_name,
idvar = names(small_demos),
direction = "wide")
}
long_to_wide_tidy_gather <- function(df){
spread(df,
key = !!var_name,
value = !!val_name)
}
long_to_wide_tidy_pivot <- function(df){
pivot_wider(df,
names_from = !!var_name,
values_from = !!val_name)
}
long_to_wide_dt <- function(df){
dcast(as.data.table(df),
as.formula(paste0("... ~", var_name)),
value.var = val_name)
}
Benchmarks for small data:
bnch_shape_wide_small <- run_benchmark(
long_to_wide_base(small_comb_long_all),
long_to_wide_tidy_gather(small_comb_long_all),
long_to_wide_tidy_pivot(small_comb_long_all),
long_to_wide_dt(small_comb_long_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_shape_wide_small
## $benchmark
## Unit: milliseconds
## expr min lq mean
## long_to_wide_base(small_comb_long_all) 70.051042 72.544492 85.502821
## long_to_wide_tidy_gather(small_comb_long_all) 6.403182 7.101287 7.326382
## long_to_wide_tidy_pivot(small_comb_long_all) 3.944102 4.844976 5.077928
## long_to_wide_dt(small_comb_long_all) 3.918392 4.743412 4.905742
## median uq max neval
## 74.647927 76.616593 1165.753955 100
## 7.286821 7.474488 9.388345 100
## 5.024573 5.360321 7.031711 100
## 4.877147 5.047589 7.186544 100
##
## $default
## expr min lq mean
## 1 long_to_wide_base(small_comb_long_all) 70.051042 72.544492 85.502821
## 2 long_to_wide_tidy_gather(small_comb_long_all) 6.403182 7.101287 7.326382
## 3 long_to_wide_tidy_pivot(small_comb_long_all) 3.944102 4.844976 5.077928
## 4 long_to_wide_dt(small_comb_long_all) 3.918392 4.743412 4.905742
## median uq max neval
## 1 74.647927 76.616593 1165.753955 100
## 2 7.286821 7.474488 9.388345 100
## 3 5.024573 5.360321 7.031711 100
## 4 4.877147 5.047589 7.186544 100
##
## $relative
## expr min lq mean
## 1 long_to_wide_base(small_comb_long_all) 17.877497 15.293735 17.429133
## 2 long_to_wide_tidy_gather(small_comb_long_all) 1.634135 1.497084 1.493430
## 3 long_to_wide_tidy_pivot(small_comb_long_all) 1.006561 1.021411 1.035099
## 4 long_to_wide_dt(small_comb_long_all) 1.000000 1.000000 1.000000
## median uq max neval
## 1 15.305655 15.178849 162.2134304 100
## 2 1.494075 1.480804 1.3063783 100
## 3 1.030228 1.061957 0.9784552 100
## 4 1.000000 1.000000 1.0000000 100
##
## $plot
Benchmarks for large data:
bnch_shape_wide_large <- run_benchmark(
long_to_wide_base(large_comb_long_all),
long_to_wide_tidy_gather(large_comb_long_all),
long_to_wide_tidy_pivot(large_comb_long_all),
long_to_wide_dt(large_comb_long_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_shape_wide_large
## $benchmark
## Unit: milliseconds
## expr min lq mean
## long_to_wide_base(large_comb_long_all) 18574.4783 20588.2739 26376.0326
## long_to_wide_tidy_gather(large_comb_long_all) 944.4465 982.2077 1522.8837
## long_to_wide_tidy_pivot(large_comb_long_all) 137.2820 148.1601 316.1469
## long_to_wide_dt(large_comb_long_all) 319.9624 331.2554 606.2003
## median uq max neval
## 22432.8338 33143.0771 44232.265 100
## 999.8674 2537.2729 4187.677 100
## 150.1174 153.0628 6315.400 100
## 335.4374 341.9275 4530.148 100
##
## $default
## expr min lq
## 1 long_to_wide_base(large_comb_long_all) 18574.4783 20588.2739
## 2 long_to_wide_tidy_gather(large_comb_long_all) 944.4465 982.2077
## 3 long_to_wide_tidy_pivot(large_comb_long_all) 137.2820 148.1601
## 4 long_to_wide_dt(large_comb_long_all) 319.9624 331.2554
## mean median uq max neval
## 1 26376.0326 22432.8338 33143.0771 44232.265 100
## 2 1522.8837 999.8674 2537.2729 4187.677 100
## 3 316.1469 150.1174 153.0628 6315.400 100
## 4 606.2003 335.4374 341.9275 4530.148 100
##
## $relative
## expr min lq mean
## 1 long_to_wide_base(large_comb_long_all) 135.301612 138.959628 83.429686
## 2 long_to_wide_tidy_gather(large_comb_long_all) 6.879608 6.629366 4.817014
## 3 long_to_wide_tidy_pivot(large_comb_long_all) 1.000000 1.000000 1.000000
## 4 long_to_wide_dt(large_comb_long_all) 2.330694 2.235793 1.917464
## median uq max neval
## 1 149.435306 216.532552 7.0038735 100
## 2 6.660571 16.576680 0.6630897 100
## 3 1.000000 1.000000 1.0000000 100
## 4 2.234501 2.233903 0.7173176 100
##
## $plot
Let’s have some preliminary objects across all of the reshaping. 1. The name of the variable column and value column 2. The entries in the value column (for wide data).
var_name <- "trait"
val_name <- "normed_score"
var_vals <- grep(x = names(small_scores),
pattern = "^X[0-9]+",
value = TRUE)
We can add functions to quickly aggregate data and make it easier to apply (using NSE to remove overhead of pasting and creating formulas).
aggr_long_base <- function(df){
mn <- aggregate(normed_score ~ data_level + trait,
FUN = mean,
data = df)
sd <- aggregate(normed_score ~ data_level + trait,
FUN = sd,
data = df)
list(mn, sd)
}
aggr_long_tidy <- function(df){
df %>%
group_by(data_level, trait) %>%
summarize(mean_score = mean(normed_score),
sd_score = sd(normed_score))
}
aggr_long_dt <- function(df){
dt <- as.data.table(df)
dt[, .(mean_score = mean(normed_score),
sd_score = sd(normed_score)),
by = .(data_level, trait)]
}
Benchmarks for small data:
bnch_aggr_long_small <- run_benchmark(
aggr_long_base(small_comb_long_all),
aggr_long_tidy(small_comb_long_all),
aggr_long_dt(small_comb_long_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_aggr_long_small
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## aggr_long_base(small_comb_long_all) 9.531267 9.752885 9.986650 9.843245
## aggr_long_tidy(small_comb_long_all) 5.465912 5.945051 6.347606 6.285369
## aggr_long_dt(small_comb_long_all) 1.156154 1.331351 1.451090 1.444099
## uq max neval
## 10.067502 12.551216 100
## 6.447125 12.352523 100
## 1.525438 4.935919 100
##
## $default
## expr min lq mean median
## 1 aggr_long_base(small_comb_long_all) 9.531267 9.752885 9.986650 9.843245
## 2 aggr_long_tidy(small_comb_long_all) 5.465912 5.945051 6.347606 6.285369
## 3 aggr_long_dt(small_comb_long_all) 1.156154 1.331351 1.451090 1.444099
## uq max neval
## 1 10.067502 12.551216 100
## 2 6.447125 12.352523 100
## 3 1.525438 4.935919 100
##
## $relative
## expr min lq mean median
## 1 aggr_long_base(small_comb_long_all) 8.243942 7.325553 6.882171 6.816184
## 2 aggr_long_tidy(small_comb_long_all) 4.727668 4.465426 4.374371 4.352450
## 3 aggr_long_dt(small_comb_long_all) 1.000000 1.000000 1.000000 1.000000
## uq max neval
## 1 6.599745 2.542833 100
## 2 4.226409 2.502578 100
## 3 1.000000 1.000000 100
##
## $plot
Benchmarks for large data:
bnch_aggr_long_large <- run_benchmark(
aggr_long_base(large_comb_long_all),
aggr_long_tidy(large_comb_long_all),
aggr_long_dt(large_comb_long_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_aggr_long_large
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## aggr_long_base(large_comb_long_all) 947.47474 1085.45412 1103.46696 1109.38487
## aggr_long_tidy(large_comb_long_all) 38.65261 43.65946 47.83091 44.88564
## aggr_long_dt(large_comb_long_all) 59.31455 74.63615 81.96242 76.36451
## uq max neval
## 1135.51806 1440.8471 100
## 45.69506 225.3857 100
## 78.32696 252.7661 100
##
## $default
## expr min lq mean
## 1 aggr_long_base(large_comb_long_all) 947.47474 1085.45412 1103.46696
## 2 aggr_long_tidy(large_comb_long_all) 38.65261 43.65946 47.83091
## 3 aggr_long_dt(large_comb_long_all) 59.31455 74.63615 81.96242
## median uq max neval
## 1 1109.38487 1135.51806 1440.8471 100
## 2 44.88564 45.69506 225.3857 100
## 3 76.36451 78.32696 252.7661 100
##
## $relative
## expr min lq mean median
## 1 aggr_long_base(large_comb_long_all) 24.512571 24.861833 23.070164 24.715807
## 2 aggr_long_tidy(large_comb_long_all) 1.000000 1.000000 1.000000 1.000000
## 3 aggr_long_dt(large_comb_long_all) 1.534555 1.709507 1.713587 1.701313
## uq max neval
## 1 24.849911 6.392805 100
## 2 1.000000 1.000000 100
## 3 1.714123 1.121482 100
##
## $plot
We can add functions to quickly aggregate data and make it easier to apply (using NSE to remove overhead of pasting and creating formulas).
aggr_wide_base <- function(df){
mn <- aggregate(x = df[var_vals],
by = df[c("data_level")],
FUN = mean)
sd <- aggregate(x = df[var_vals],
by = df[c("data_level")],
FUN = sd)
list(mn, sd)
}
aggr_wide_tidy <- function(df){
df %>%
group_by(data_level) %>%
summarize(across(.cols = all_of(var_vals),
.fns = list(mean = mean,
sd = sd)))
}
aggr_wide_dt <- function(df){
dt <- as.data.table(df)
dt[, c(lapply(setNames(.SD, paste0("mean_", names(.SD))), mean),
lapply(setNames(.SD, paste0("sd_", names(.SD))), sd)),
by = data_level,
.SDcols = var_vals]
}
Benchmarks for small data:
bnch_aggr_wide_small <- run_benchmark(
aggr_wide_base(small_comb_all),
aggr_wide_tidy(small_comb_all),
aggr_wide_dt(small_comb_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_aggr_wide_small
## $benchmark
## Unit: milliseconds
## expr min lq mean median uq
## aggr_wide_base(small_comb_all) 4.317722 4.523416 4.664629 4.580209 4.712265
## aggr_wide_tidy(small_comb_all) 5.958957 6.501841 7.834972 6.790501 7.117837
## aggr_wide_dt(small_comb_all) 1.790795 1.910086 2.073120 2.006660 2.096089
## max neval
## 9.510568 100
## 104.714960 100
## 7.889932 100
##
## $default
## expr min lq mean median uq
## 1 aggr_wide_base(small_comb_all) 4.317722 4.523416 4.664629 4.580209 4.712265
## 2 aggr_wide_tidy(small_comb_all) 5.958957 6.501841 7.834972 6.790501 7.117837
## 3 aggr_wide_dt(small_comb_all) 1.790795 1.910086 2.073120 2.006660 2.096089
## max neval
## 1 9.510568 100
## 2 104.714960 100
## 3 7.889932 100
##
## $relative
## expr min lq mean median uq
## 1 aggr_wide_base(small_comb_all) 2.411064 2.368174 2.250053 2.282504 2.248123
## 2 aggr_wide_tidy(small_comb_all) 3.327548 3.403952 3.779315 3.383982 3.395771
## 3 aggr_wide_dt(small_comb_all) 1.000000 1.000000 1.000000 1.000000 1.000000
## max neval
## 1 1.205406 100
## 2 13.271972 100
## 3 1.000000 100
##
## $plot
Benchmarks for large data:
bnch_aggr_wide_large <- run_benchmark(
aggr_wide_base(large_comb_all),
aggr_wide_tidy(large_comb_all),
aggr_wide_dt(large_comb_all)
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
bnch_aggr_wide_large
## $benchmark
## Unit: milliseconds
## expr min lq mean median
## aggr_wide_base(large_comb_all) 227.13527 229.21543 249.78201 230.98965
## aggr_wide_tidy(large_comb_all) 18.04147 19.06485 19.42071 19.36601
## aggr_wide_dt(large_comb_all) 16.77064 17.34240 20.76989 17.78978
## uq max neval
## 232.86863 470.24675 100
## 19.76823 20.84071 100
## 18.08370 177.43426 100
##
## $default
## expr min lq mean median
## 1 aggr_wide_base(large_comb_all) 227.13527 229.21543 249.78201 230.98965
## 2 aggr_wide_tidy(large_comb_all) 18.04147 19.06485 19.42071 19.36601
## 3 aggr_wide_dt(large_comb_all) 16.77064 17.34240 20.76989 17.78978
## uq max neval
## 1 232.86863 470.24675 100
## 2 19.76823 20.84071 100
## 3 18.08370 177.43426 100
##
## $relative
## expr min lq mean median
## 1 aggr_wide_base(large_comb_all) 13.543626 13.21705 12.0261590 12.984402
## 2 aggr_wide_tidy(large_comb_all) 1.075777 1.09932 0.9350417 1.088603
## 3 aggr_wide_dt(large_comb_all) 1.000000 1.00000 1.0000000 1.000000
## uq max neval
## 1 12.877264 2.650259 100
## 2 1.093152 0.117456 100
## 3 1.000000 1.000000 100
##
## $plot